package com.shujia.spark.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo09Union {
  def main(args: Array[String]): Unit = {
    //创建配置文件
    val conf: SparkConf =new SparkConf()
      .setAppName("union")
      .setMaster("local")

    //创建spark对象
    val sc = new SparkContext(conf)

    //读数据
    val rdd1: RDD[Int] = sc.parallelize(List(1,2,3,4,5,6))
    val rdd2: RDD[Int] = sc.parallelize(List(4,5,6,7,8,9))

    /**
      * union:合并两个RDD，rdd的类型必须一致，不会去重
      */
    val unionRDD: RDD[Int] = rdd1.union(rdd2)

    /**
      * 去重
      */
    val distinctRDD: RDD[Int] = unionRDD.distinct()

    distinctRDD.foreach(println)
  }
}
